# estimateModels.R
# created 2012 July 26

estimateModels <- function (
  varNames, 
  modelEnvir, 
  dfNames, 
  objectSuffix = '.IV', 
  estFun       = ivreg,
  saveX        = TRUE,
  subset       = NULL, 
  ...) {
  # This function estimates n models for each of k outcome variables.  It 
  # returns an environment containing n*k model objects.  [2012 07 26]
  #
  # For estimation, the function relies on estimateModel().  (See below.)  One
  # advantage of calling estimateModel() is that it passes a listwise-deleted
  # matrix with all of the relevant variables to the estimating function, 
  # e.g., to ivreg().  This is an advantage because it's often best not to let
  # functions like ivreg() handle missing-data problems themselves, and 
  # because functions like poly() cannot handle missing data at all.  
  # [2012 07 31]
  
  # varNames: character vector of outcome variables
  # modelEnvir: environment containing only the models to be estimated.  The 
    # left-hand sides of the models may be wrong; they will be replaced with 
    # the variables in varNames.  modelEnvir is probably created by 
    # importIVModels().
  # dfNames: a character vector as long as varNames.  Specifies the data
  #   frames that are to be used when estimating models for each outcome.
  #   These data frames may have been created by makeIVEstimationDataFrame().
  # objectSuffix: string appended to the name of each object in the returned
  #   environment.
  # saveX: save X in the model object.  Makes summary() run faster.
  
  # This function takes a character vector of outcome-variable names 
  # (varNames) and an environment in which models are stored (modelEnvir).  
  # The function will estimate all of the models in modelEnvir for each of the 
  # varNames.  It will place the estimated-model objects in objectEnvir, and 
  # it will return that environment.  [2012 07 26]
  
  # This function is meant to be used after importIVModels(), which creates
  # modelEnvir.  [2012 07 26]
  
  # WARNING: 
  # --Don't assume that an R environment holds objects in the desired
  #   order!  R makes no guarantee of this.  This matters, for example, when 
  #   using eapply().  [2012 07 26]
  #
  # --The subset argument works by paring down the data frame ("df") before it
  #   is passed to the regression command.  I am not yet sure that it works.
  #   [2012 10 06]
  
  if (!is.null(subset) && class(subset) != 'character') {
    stop("subset must be NULL or of the \"character\" class")
  }
  objectEnvir <- new.env()
  
  for (i in 1:length(varNames)) {
    df <- get(dfNames[i])
    eapply(
      modelEnvir, 
      function (x) estimateModel(
        model        = x, 
        depvarName   = varNames[i], 
        df           = df, 
        objectSuffix = objectSuffix, 
        objectEnvir  = objectEnvir,
        estFun       = estFun,
        saveX        = saveX,
        subset       = subset,
        ...))
  }
  return(objectEnvir)
}


estimateModel <- function (model, modelName, depvarName, df, objectSuffix, objectEnvir, estFun, saveX, subset, ...) {
  # Internal function.  Should not be called by user.  [2012 07 26]

  require(AER)      # for ivreg()
  require(Formula)  # for as.Formula()
  require(stringr)  # for str_wrap()
  
  if (is.null(attributes(model)$modNum)) {
    warning(str_wrap("The modNum attribute for a model hasn't been set.  Only one model will be returned for each dependent variable.", 68, exdent = 2))
  }
  depvar            <- get(depvarName, envir = as.environment(df))
  outputNum         <- attributes(model)$modNum
  outputName        <- paste0(depvarName, objectSuffix, outputNum)
  formulaString     <- paste0(depvarName, ' ~ ', as.character(model)[3])
  formulaToEstimate <- as.Formula(formulaString)
  
  # Find the name of the variable on which we are subsetting.  [2012 10 06]
  subsetVarName <- NULL	
  if (!is.null(subset)) {
    subsetVarName <- sub('^(\\w+).*', '\\1', subset)
    if (!(subsetVarName %in% colnames(df))) { 
      stop("The variable in the 'subset' argument doesn't seem to be in one of the data frames.  Check ANES.df and GSS.df.")
    }
  }
  
  # Omit missing data.  This lets me use poly() instead of cumbersome notation
  # when I have polynomial terms in regressions.  It also assures me that 
  # my estimation problems aren't due to clumsy NA handling.  [2012 07 30]
  df <- na.omit(df[, unique(c(all.vars(formulaToEstimate), subsetVarName)) ])    
  if (nrow(df) <= 1) {
    stop(paste0('For outcome ', depvarName, ', listwise deletion created a data frame with 0 rows or 1 row.'))
  }
  
  # Subset the data frame.  [2012 10 06]
  if (!is.null(subset)) {
    rowsToKeep <- with(df, eval(parse(text = subset)))
    if (class(rowsToKeep) != 'logical') {
      stop('rowsToKeep must be logical')
    }
    df <- df[rowsToKeep, ]
  }
  
  # Remove subset variable itself from the data frame.  For example, if we  
  # have subset on the race variable, such that we only keep white
  # respondents, we now want to eliminate the race variable itself.  The 
  # "drop = FALSE" argument is to prevent single-column data frames from being
  # transformed into vectors.  [2014 01 03]
  if (!is.null(subsetVarName) && subsetVarName %in% colnames(df)) {
    df <- df[, !colnames(df) %in% subsetVarName, drop = FALSE]
  }

  # Remove subset variable from the formula, so that we don't try to estimate
  # a model that includes it.   [2015 12 10]
  if (!is.null(subsetVarName) && grepl(subsetVarName, formulaString)) {
    formulaString <- gsub(
      pattern     = paste('\\+?\\s?', subsetVarName), 
      replacement = '', 
      x           = formulaString)
    
    # Fix a problem that arises if one of the dropped variables was the 
    # first variable in the second stage of the formula.  [2013 08 02]
    formulaString <- sub('~ \\+ ', '~ ', formulaString)
  }

  # Drop variables that don't vary. This sort of pre-estimation data cleaning 
  # isn't required by lm(), but AER::ivreg() does require it.  [2013 08 02]
  unvaryingCols <- apply(df, 2, function (x) length(unique(x))) == 1
  if (any(unvaryingCols)) {
    unvaryingColNames <- colnames(df)[unvaryingCols]
    df <- df[, -which(unvaryingCols)]
  
    # Drop terms from the model.  [2013 08 02]
    for (varNameToDrop in unvaryingColNames) {
      formulaString <- gsub(
        pattern     = paste('\\+?\\s?', varNameToDrop), 
        replacement = '', 
        x           = formulaString)
      
      # Fix a problem that arises if one of the dropped variables was the 
      # first variable in the second stage of the formula.  [2013 08 02]
      formulaString <- sub('~ \\+ ', '~ ', formulaString)
      
      # Issue a warning if an unvarying "variable" was removed from the 
      # formula.  [2013 08 02]
      warning(
        str_wrap(
          paste0("For model " , outputNum, " (outcome \"", depvarName, "\"), \"", varNameToDrop, "\" has been removed as a predictor.  It did not vary within the data frame that is to be used for estimation."),
          68, 
          exdent = 2)
      )
    }     
  }
  
  # Estimate the model.
  formulaToEstimate <- as.Formula(formulaString)
  estimatedModel    <- do.call(estFun, args = list(formula = formulaToEstimate, data = df, x = saveX, ...))
  assign(outputName, estimatedModel, envir = objectEnvir)  
}


